In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import os
import re
import json
import copy

1. Data Preparation¶

In [2]:
# define path for both log files
setup_csv_filename = "setup_log.csv"
test_log_jsonl_filename = "test_log.jsonl"
log_path = os.path.abspath(r"./TestingBatchRunData_02/log")
data_dict = dict()
In [3]:
# read cost data for setup.sh
file_path = os.path.join(log_path, setup_csv_filename)
log_file = open(file_path, "r")
data = []
for idx, line in enumerate(log_file.readlines()):
    if idx == 0 :
        cols = line.split(", ")
        cols[-1] = cols[-1][:-1]
    else:
        data_line = line.split(", ")
        data_line[-1] = data_line[-1][:-1]
        data.append(data_line)   
log_file.close()
data_dict['setup'] = pd.DataFrame(data, columns = cols)
In [4]:
# Data Preprocessing
data_dict['setup']['Datetime'] = pd.to_datetime(data_dict['setup']['Datetime'], format = "%Y%m%d%H%M%S")
data_dict['setup']['citcuit']  = pd.Categorical(data_dict['setup']['citcuit'], 
                                                categories=['PublicKeyGen',
                                                           'encryptedVoteGen',
                                                           'tallying'], 
                                                ordered=True)
for column in ['nVoters','nConstraints','t_compile','t_proofGen','provingKeySize']:
    data_dict['setup'][column] = data_dict['setup'][column].astype('float')
data_dict['setup'] = data_dict['setup'].rename({'citcuit':'circuit'}, axis = 1)
data_dict['setup'] = data_dict['setup'].sort_values(['nVoters','Datetime','circuit'])
data_dict['setup'] = data_dict['setup'].drop_duplicates(ignore_index = True)
In [5]:
data_dict['setup']
Out[5]:
Datetime nVoters design circuit nConstraints t_compile t_proofGen provingKeySize
0 2022-11-08 14:53:06 20.0 original PublicKeyGen 1291.0 4240.0 19851.0 807556.0
1 2022-11-08 14:53:06 20.0 original encryptedVoteGen 13515.0 32272.0 65268.0 8168388.0
2 2022-11-08 14:53:06 20.0 original tallying 11380.0 27430.0 61442.0 7240904.0
3 2022-11-09 00:26:21 20.0 original PublicKeyGen 1291.0 5168.0 19505.0 807556.0
4 2022-11-09 00:26:21 20.0 original encryptedVoteGen 13515.0 32476.0 64541.0 8168388.0
5 2022-11-09 00:26:21 20.0 original tallying 11380.0 26296.0 61808.0 7240904.0
6 2022-11-03 13:28:44 30.0 original PublicKeyGen 1291.0 5107.0 21023.0 807556.0
7 2022-11-03 13:28:44 30.0 original encryptedVoteGen 18995.0 47584.0 93600.0 12198244.0
8 2022-11-03 13:28:44 30.0 original tallying 16680.0 43778.0 90582.0 11188320.0
9 2022-11-03 23:50:01 30.0 original PublicKeyGen 1291.0 4890.0 22941.0 807556.0
10 2022-11-03 23:50:01 30.0 original encryptedVoteGen 18995.0 46174.0 94019.0 12198244.0
11 2022-11-03 23:50:01 30.0 original tallying 16680.0 40201.0 88261.0 11188320.0
12 2022-11-04 17:03:08 30.0 original PublicKeyGen 1291.0 4505.0 21179.0 807556.0
13 2022-11-04 17:03:08 30.0 original encryptedVoteGen 18995.0 44038.0 92883.0 12198244.0
14 2022-11-04 17:03:08 30.0 original tallying 16680.0 41354.0 90393.0 11188320.0
15 2022-11-06 15:04:03 40.0 original PublicKeyGen 1291.0 4667.0 19656.0 807556.0
16 2022-11-06 15:04:03 40.0 original encryptedVoteGen 24555.0 60738.0 100702.0 15222724.0
17 2022-11-06 15:04:03 40.0 original tallying 21980.0 53611.0 96032.0 14087160.0
18 2022-11-07 15:36:55 50.0 original PublicKeyGen 1291.0 5229.0 19395.0 807556.0
19 2022-11-07 15:36:55 50.0 original encryptedVoteGen 30055.0 71869.0 111056.0 18214804.0
20 2022-11-07 15:36:55 50.0 original tallying 27280.0 66160.0 106529.0 16986000.0
21 2022-11-05 02:02:33 60.0 original PublicKeyGen 1291.0 4298.0 19747.0 807556.0
22 2022-11-05 02:02:33 60.0 original encryptedVoteGen 35555.0 84574.0 145939.0 23314684.0
23 2022-11-05 02:02:33 60.0 original tallying 32580.0 80250.0 120813.0 19884840.0
24 2022-11-05 12:00:42 60.0 original PublicKeyGen 1291.0 4615.0 19003.0 807556.0
25 2022-11-05 12:00:42 60.0 original encryptedVoteGen 35555.0 86747.0 145415.0 23314684.0
26 2022-11-05 12:00:42 60.0 original tallying 32580.0 81846.0 121732.0 19884840.0
In [6]:
# read cost data for test_log.jsonl (The cost data for each test voting)
file_path = os.path.join(log_path, test_log_jsonl_filename)
log_file = open(file_path, "r")
for idx, line in enumerate(log_file.readlines()):
    # data_dict = json.load(line)  
    pass
log_file.close()
jsonObj = pd.read_json(path_or_buf=file_path, lines=True)


data_dict['test'] = jsonObj.explode(['steps'],ignore_index =True)

field_names = []
for idx,row in data_dict['test'].iterrows():
    steps_dict = copy.deepcopy(row['steps'])
    for key in steps_dict:
        if key not in field_names:
            field_names.append(key)


for col_suffix in field_names:
    data_dict['test'][f"step__{col_suffix}"] = data_dict['test'][['steps']].apply(lambda x : x['steps'].get(col_suffix), axis = 1)
In [7]:
full_run_steps = data_dict['test']['step__name'].loc[0:14].tolist()
In [8]:
data_dict['test']['vote_gen_family__params'] = data_dict['test'][['vote_gen_family','params']].apply(lambda row: row['vote_gen_family']+f"[{row['params'][0]:.1f}]", axis = 1)
In [9]:
display(data_dict['test'].head(20))
start_time n_voters vote_gen_family params steps step__name step__start_time step__duration step__status_ok step__message step__cost vote_gen_family__params
0 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Generate Testing Data', 'start_time'... Generate Testing Data 1667482165641 144974 True 30 NaN sequential0[0.0]
1 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Deploy the contracts', 'start_time':... Deploy the contracts 1667482310623 67 True None NaN sequential0[0.0]
2 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Set Verification keys: PublicKey', '... Set Verification keys: PublicKey 1667482310697 383 True None 462204.0 sequential0[0.0]
3 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Set Verification keys: EncrpytedVote... Set Verification keys: EncrpytedVote 1667482311080 980 True None 1815435.0 sequential0[0.0]
4 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Set Verification keys: Tallying', 's... Set Verification keys: Tallying 1667482312060 687 True None NaN sequential0[0.0]
5 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Register public keys for elligible u... Register public keys for elligible users excep... 1667482312777 30303 True None 10167440.0 sequential0[0.0]
6 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Throw an error if non-elligable user... Throw an error if non-elligable user tries to ... 1667482343143 543 True None NaN sequential0[0.0]
7 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Throw an error if sender already reg... Throw an error if sender already registered 1667482343826 1670 True None NaN sequential0[0.0]
8 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Register public key of the last vote... Register public key of the last voter 1667482345564 1068 True None 350144.0 sequential0[0.0]
9 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Throw an error if an user tries to r... Throw an error if an user tries to register bu... 1667482346706 146 True None NaN sequential0[0.0]
10 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Cast valid encrypted votes', 'start_... Cast valid encrypted votes 1667482346865 49306 True None 23463802.0 sequential0[0.0]
11 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Throw an error if elligable user pro... Throw an error if elligable user provides inva... 1667482396263 2664 True None NaN sequential0[0.0]
12 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Malicious Administrator', 'start_tim... Malicious Administrator 1667482399033 7014 True None NaN sequential0[0.0]
13 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Honest Administrator', 'start_time':... Honest Administrator 1667482406110 4365 True None 750458.0 sequential0[0.0]
14 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Refund deposits for all', 'start_tim... Refund deposits for all 1667482410520 4515 True None 52297.0 sequential0[0.0]
15 2022-11-03 13:34:09.862000+00:00 30 sequential0 [0] {'name': 'Generate Testing Data', 'start_time'... Generate Testing Data 1667482450022 124970 True 30 NaN sequential0[0.0]
16 2022-11-03 13:34:09.862000+00:00 30 sequential0 [0] {'name': 'Deploy the contracts', 'start_time':... Deploy the contracts 1667482575002 61 True None NaN sequential0[0.0]
17 2022-11-03 13:34:09.862000+00:00 30 sequential0 [0] {'name': 'Set Verification keys: PublicKey', '... Set Verification keys: PublicKey 1667482575069 190 True None 462204.0 sequential0[0.0]
18 2022-11-03 13:34:09.862000+00:00 30 sequential0 [0] {'name': 'Set Verification keys: EncrpytedVote... Set Verification keys: EncrpytedVote 1667482575259 534 True None 1815435.0 sequential0[0.0]
19 2022-11-03 13:34:09.862000+00:00 30 sequential0 [0] {'name': 'Set Verification keys: Tallying', 's... Set Verification keys: Tallying 1667482575793 526 True None NaN sequential0[0.0]
In [10]:
import matplotlib
import plotly
import plotly.express as px
import plotly.graph_objects as go
In [11]:
data_dict['setup'].columns
Out[11]:
Index(['Datetime', 'nVoters', 'design', 'circuit', 'nConstraints', 't_compile',
       't_proofGen', 'provingKeySize'],
      dtype='object')

2. Cost of Setup¶

In [12]:
for y in ['nConstraints', 't_compile','t_proofGen', 'provingKeySize']:
    fig = px.scatter(data_dict['setup'], x = 'nVoters', y = y, title = f"{y}", facet_col = "circuit", facet_col_wrap=1)
    for k in fig.layout:
                if re.search('yaxis[1-9]+', k):
                    fig.layout[k].update(matches=None)
    fig.show()
    

3. Cost of Test Run¶

In [13]:
data_dict['test']
Out[13]:
start_time n_voters vote_gen_family params steps step__name step__start_time step__duration step__status_ok step__message step__cost vote_gen_family__params
0 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Generate Testing Data', 'start_time'... Generate Testing Data 1667482165641 144974 True 30 NaN sequential0[0.0]
1 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Deploy the contracts', 'start_time':... Deploy the contracts 1667482310623 67 True None NaN sequential0[0.0]
2 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Set Verification keys: PublicKey', '... Set Verification keys: PublicKey 1667482310697 383 True None 462204.0 sequential0[0.0]
3 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Set Verification keys: EncrpytedVote... Set Verification keys: EncrpytedVote 1667482311080 980 True None 1815435.0 sequential0[0.0]
4 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Set Verification keys: Tallying', 's... Set Verification keys: Tallying 1667482312060 687 True None NaN sequential0[0.0]
... ... ... ... ... ... ... ... ... ... ... ... ...
17238 2022-11-09 02:31:20.199000+00:00 20 permutation [1] {'name': 'Cast valid encrypted votes', 'start_... Cast valid encrypted votes 1667961169673 26462 True None 12629547.0 permutation[1.0]
17239 2022-11-09 02:31:20.199000+00:00 20 permutation [1] {'name': 'Throw an error if elligable user pro... Throw an error if elligable user provides inva... 1667961196183 2079 True None NaN permutation[1.0]
17240 2022-11-09 02:31:20.199000+00:00 20 permutation [1] {'name': 'Malicious Administrator', 'start_tim... Malicious Administrator 1667961198342 5609 True None NaN permutation[1.0]
17241 2022-11-09 02:31:20.199000+00:00 20 permutation [1] {'name': 'Honest Administrator', 'start_time':... Honest Administrator 1667961204010 3762 True None 580245.0 permutation[1.0]
17242 2022-11-09 02:31:20.199000+00:00 20 permutation [1] {'name': 'Refund deposits for all', 'start_tim... Refund deposits for all 1667961207813 3752 True None 52297.0 permutation[1.0]

17243 rows × 12 columns

3.1 Assgin step index for grouping and sorting purposes¶

In [14]:
step_index = {u : f"{v:02}" for v,u in data_dict['test']["step__name"].iloc[0:15].to_dict().items()}

data_dict['test']["step__name"] = data_dict['test']["step__name"].apply(lambda x: f"{step_index[x]}_{x}")

3.2 Filter out unstructured test cases¶

In [15]:
test_sample  = data_dict['test'][
    (data_dict['test']["n_voters"]!=20) & 
    (~data_dict['test']["vote_gen_family__params"].str.contains("independent") &
    (data_dict['test']["vote_gen_family__params"] != 'sequential0[0.1]') 
    )]

3.3 check error messages in steps¶

In [16]:
test_sample[test_sample['step__name'] == "04_Set Verification keys: Tallying"]
Out[16]:
start_time n_voters vote_gen_family params steps step__name step__start_time step__duration step__status_ok step__message step__cost vote_gen_family__params
4 2022-11-03 13:29:25.499000+00:00 30 sequential0 [0] {'name': 'Set Verification keys: Tallying', 's... 04_Set Verification keys: Tallying 1667482312060 687 True None NaN sequential0[0.0]
19 2022-11-03 13:34:09.862000+00:00 30 sequential0 [0] {'name': 'Set Verification keys: Tallying', 's... 04_Set Verification keys: Tallying 1667482575793 526 True None NaN sequential0[0.0]
34 2022-11-03 13:38:17.348000+00:00 30 sequential0 [0] {'name': 'Set Verification keys: Tallying', 's... 04_Set Verification keys: Tallying 1667482822326 538 True None NaN sequential0[0.0]
49 2022-11-03 13:42:24.246000+00:00 30 sequential0 [0] {'name': 'Set Verification keys: Tallying', 's... 04_Set Verification keys: Tallying 1667483070782 488 True None NaN sequential0[0.0]
64 2022-11-03 13:46:31.008000+00:00 30 sequential0 [0] {'name': 'Set Verification keys: Tallying', 's... 04_Set Verification keys: Tallying 1667483316961 446 True None NaN sequential0[0.0]
... ... ... ... ... ... ... ... ... ... ... ... ...
14029 2022-11-08 13:02:51.688000+00:00 50 permutation [1] {'name': 'Set Verification keys: Tallying', 's... 04_Set Verification keys: Tallying 1667912802653 679 True None NaN permutation[1.0]
14044 2022-11-08 13:09:48.520000+00:00 50 permutation [1] {'name': 'Set Verification keys: Tallying', 's... 04_Set Verification keys: Tallying 1667913220029 679 True None NaN permutation[1.0]
14059 2022-11-08 13:16:52.921000+00:00 50 permutation [1] {'name': 'Set Verification keys: Tallying', 's... 04_Set Verification keys: Tallying 1667913643116 645 True None NaN permutation[1.0]
14074 2022-11-08 13:23:46.087000+00:00 50 permutation [1] {'name': 'Set Verification keys: Tallying', 's... 04_Set Verification keys: Tallying 1667914056675 672 True None NaN permutation[1.0]
14089 2022-11-08 13:30:42.375000+00:00 50 permutation [1] {'name': 'Set Verification keys: Tallying', 's... 04_Set Verification keys: Tallying 1667914474958 758 True None NaN permutation[1.0]

919 rows × 12 columns

In [17]:
test_sample[test_sample['step__status_ok']==False]
Out[17]:
start_time n_voters vote_gen_family params steps step__name step__start_time step__duration step__status_ok step__message step__cost vote_gen_family__params
6224 2022-11-05 15:33:25.180000+00:00 60 sequential0 [0.6000000000000001] {'name': 'Refund deposits for all', 'start_tim... 14_Refund deposits for all 1667662935159 5704 False Error: Returned error: VM Exception while proc... 52297.0 sequential0[0.6]
6929 2022-11-05 22:58:22.132000+00:00 60 sequential1 [0.4] {'name': 'Refund deposits for all', 'start_tim... 14_Refund deposits for all 1667689633128 5931 False Error: Returned error: VM Exception while proc... 52297.0 sequential1[0.4]
In [18]:
test_sample[test_sample['step__status_ok']==False]['step__message'].tolist()
Out[18]:
['Error: Returned error: VM Exception while processing transaction: revert Illegal reclaim -- Reason given: Illegal reclaim.; ',
 'Error: Returned error: VM Exception while processing transaction: revert Illegal reclaim -- Reason given: Illegal reclaim.; ']

3.4 Generating Box plot for runs¶

In [19]:
test_summary_avg_dict_list = []
for step__name , df in test_sample.sort_values(["n_voters","vote_gen_family__params"], ignore_index = True).groupby(["step__name"], as_index = False):
    column_rename_dict = {
        "step__duration" : "DURATION",
        "step__cost" : "COST",
        "vote_gen_family__params" : 'Vote-Gen Family & Params'
    }
    for y in ["step__duration","step__cost"]:
        if not df[y].isna().all():
            df_temp = df[[y,'vote_gen_family__params','vote_gen_family','n_voters','params']].copy()
            df_temp = df_temp.rename(column_rename_dict, axis = 1)
            df_temp = df_temp.rename(column_rename_dict, axis = 1)
            for (n_voters,vgfp), df_temp_temp in df_temp[["n_voters",'Vote-Gen Family & Params',column_rename_dict[y]]].groupby(["n_voters",'Vote-Gen Family & Params'] , as_index = False):
                for agg in ['min','mean','max']:
                    
                    test_summary_avg_dict_list.append({
                        "step__name" : step__name,
                        "n_voters" : n_voters,
                        'Vote-Gen Family & Params' : vgfp,
                        'Agg' : agg,
                        column_rename_dict[y] : df_temp_temp[column_rename_dict[y]].aggregate(agg)
                    })                                                       
            title = step__name
            title_len_limit = 60
            if len(title) > title_len_limit:
                title = title[:title_len_limit]+'...'
            title = f"Step : \"{title}\" {y.split('__')[1].upper()}"
            fig = px.box(df_temp, 
                         x = 'Vote-Gen Family & Params', 
                         y = y.split('__')[1].upper(), 
                         title = title, color = 'vote_gen_family',
                         points = "all", 
                         facet_row = 'n_voters',
                         width=1000, height=1000)
            for k in fig.layout:
                if re.search('yaxis[1-9]+', k):
                    fig.layout[k].update(matches=None)
            
            fig.show()

3.5 Aggregate Summary¶

In [20]:
test_agg_summary_df = pd.DataFrame.from_dict(test_summary_avg_dict_list)
merge_column = test_agg_summary_df.columns.tolist()[0:4]
test_agg_summary_df = test_agg_summary_df[merge_column + ['DURATION']].dropna().merge(test_agg_summary_df[merge_column + ['COST']].dropna(), on = merge_column,how = 'left')
test_agg_summary_df = test_agg_summary_df.fillna(0.0)
In [21]:
test_agg_summary_df.drop_duplicates(merge_column)
Out[21]:
step__name n_voters Vote-Gen Family & Params Agg DURATION COST
0 00_Generate Testing Data 30 permutation[0.0] min 124307.00 0.0
1 00_Generate Testing Data 30 permutation[0.0] mean 125187.35 0.0
2 00_Generate Testing Data 30 permutation[0.0] max 126149.00 0.0
3 00_Generate Testing Data 30 permutation[0.2] min 124172.00 0.0
4 00_Generate Testing Data 30 permutation[0.2] mean 125329.10 0.0
... ... ... ... ... ... ...
3235 14_Refund deposits for all 60 sequential1[0.8] mean 6094.20 52297.0
3236 14_Refund deposits for all 60 sequential1[0.8] max 6811.00 52297.0
3237 14_Refund deposits for all 60 sequential1[1.0] min 5644.00 52297.0
3238 14_Refund deposits for all 60 sequential1[1.0] mean 5888.60 52297.0
3239 14_Refund deposits for all 60 sequential1[1.0] max 6195.00 52297.0

3240 rows × 6 columns

In [22]:
for Agg ,df in test_agg_summary_df.groupby('Agg'):
    fig_bar = px.bar(df, 
                     x = 'Vote-Gen Family & Params', 
                     y = "DURATION",
                     title = 'Aggregate Duration', 
                     color = 'step__name',
                     facet_row = 'n_voters',
                 width=1000, height=1000)
    fig_bar.show()
In [25]:
for Agg ,df in test_agg_summary_df.groupby('Agg'):
    fig_bar = px.bar(df, 
                     x = 'Vote-Gen Family & Params', 
                     y = "DURATION",
                     title = f'{Agg} Duration', 
                     color = 'step__name',
                     facet_row = 'n_voters',
                 width=1000, height=1000)
    
    fig_bar.update_layout(showlegend=False)
    fig_bar.show()
In [26]:
for Agg ,df in test_agg_summary_df.groupby('Agg'):
    fig_bar = px.bar(df, 
                     x = 'Vote-Gen Family & Params', 
                     y = "COST",
                     title = f'{Agg} Cost', 
                     color = 'step__name',
                     facet_row = 'n_voters',
                 width=1000, height=1000)
    fig_bar.update_layout(showlegend=False)
    fig_bar.show()
In [27]:
df = test_agg_summary_df[test_agg_summary_df['Agg']=='mean'].groupby(['n_voters','Vote-Gen Family & Params'], 
                                                               as_index=False).sum().groupby("n_voters").mean()
In [28]:
px.line(df['DURATION'])
In [29]:
px.line(df['COST'])
In [30]:
test_agg_summary_df[test_agg_summary_df['Agg']=='mean']
Out[30]:
step__name n_voters Vote-Gen Family & Params Agg DURATION COST
1 00_Generate Testing Data 30 permutation[0.0] mean 125187.35 0.0
4 00_Generate Testing Data 30 permutation[0.2] mean 125329.10 0.0
7 00_Generate Testing Data 30 permutation[0.4] mean 124857.90 0.0
10 00_Generate Testing Data 30 permutation[0.6] mean 125529.85 0.0
13 00_Generate Testing Data 30 permutation[0.8] mean 125720.45 0.0
... ... ... ... ... ... ...
3226 14_Refund deposits for all 60 sequential1[0.2] mean 5864.10 52297.0
3229 14_Refund deposits for all 60 sequential1[0.4] mean 5925.50 52297.0
3232 14_Refund deposits for all 60 sequential1[0.6] mean 5962.90 52297.0
3235 14_Refund deposits for all 60 sequential1[0.8] mean 6094.20 52297.0
3238 14_Refund deposits for all 60 sequential1[1.0] mean 5888.60 52297.0

1080 rows × 6 columns

In [ ]: